Example: Multioutput regression¶
This example shows how to use ATOM to make preditions on a multioutput regression dataset. One of the models used is a MLP regressor implemented with Keras using scikeras.
The data used is a synthetic dataset created using sklearn's make_regression function.
Load the data¶
InĀ [1]:
Copied!
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel('ERROR')
import numpy as np
from atom import ATOMRegressor, ATOMModel
from sklearn.datasets import make_regression
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel('ERROR')
import numpy as np
from atom import ATOMRegressor, ATOMModel
from sklearn.datasets import make_regression
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense
InĀ [2]:
Copied!
# Create data
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)
# Create data
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)
InĀ [3]:
Copied!
# Create the neural network
class NeuralNetwork(KerasRegressor):
"""Multioutput multilayer perceptron."""
@staticmethod
def _keras_build_fn(n_inputs, n_outputs, **kwargs):
"""Create the model's architecture."""
model = Sequential()
model.add(Dense(20, input_dim=n_inputs, activation="relu"))
model.add(Dense(20, activation="relu"))
model.add(Dense(n_outputs))
model.compile(loss="mse", optimizer="adam")
return model
# Create the neural network
class NeuralNetwork(KerasRegressor):
"""Multioutput multilayer perceptron."""
@staticmethod
def _keras_build_fn(n_inputs, n_outputs, **kwargs):
"""Create the model's architecture."""
model = Sequential()
model.add(Dense(20, input_dim=n_inputs, activation="relu"))
model.add(Dense(20, activation="relu"))
model.add(Dense(n_outputs))
model.compile(loss="mse", optimizer="adam")
return model
InĀ [4]:
Copied!
# Convert the model to an ATOM model
model = ATOMModel(
estimator=NeuralNetwork(n_inputs=5, n_outputs=y.shape[1], epochs=100, verbose=0),
name="NN",
needs_scaling=True, # Applies automated feature scaling before fitting
native_multioutput=True, # Do not use a multioutput meta-estimator wrapper
)
# Convert the model to an ATOM model
model = ATOMModel(
estimator=NeuralNetwork(n_inputs=5, n_outputs=y.shape[1], epochs=100, verbose=0),
name="NN",
needs_scaling=True, # Applies automated feature scaling before fitting
native_multioutput=True, # Do not use a multioutput meta-estimator wrapper
)
Run the pipeline¶
InĀ [5]:
Copied!
atom = ATOMRegressor(X, y=y, verbose=2, random_state=1)
atom = ATOMRegressor(X, y=y, verbose=2, random_state=1)
<< ================== ATOM ================== >> Configuration ==================== >> Algorithm task: Multioutput regression. Dataset stats ==================== >> Shape: (1000, 13) Train set size: 800 Test set size: 200 ------------------------------------- Memory: 104.13 kB Scaled: True Outlier values: 27 (0.3%)
InĀ [6]:
Copied!
# Show the models that natively support multioutput tasks
atom.available_models()[["acronym", "model", "native_multioutput"]]
# Show the models that natively support multioutput tasks
atom.available_models()[["acronym", "model", "native_multioutput"]]
Out[6]:
| acronym | model | native_multioutput | |
|---|---|---|---|
| 0 | AdaB | AdaBoost | False |
| 1 | ARD | AutomaticRelevanceDetermination | False |
| 2 | Bag | Bagging | False |
| 3 | BR | BayesianRidge | False |
| 4 | CatB | CatBoost | False |
| 5 | Tree | DecisionTree | True |
| 6 | Dummy | Dummy | False |
| 7 | EN | ElasticNet | False |
| 8 | ETree | ExtraTree | True |
| 9 | ET | ExtraTrees | True |
| 10 | GP | GaussianProcess | False |
| 11 | GBM | GradientBoostingMachine | False |
| 12 | Huber | HuberRegression | False |
| 13 | hGBM | HistGradientBoosting | False |
| 14 | KNN | KNearestNeighbors | True |
| 15 | Lasso | Lasso | False |
| 16 | Lars | LeastAngleRegression | False |
| 17 | LGB | LightGBM | False |
| 18 | lSVM | LinearSVM | False |
| 19 | MLP | MultiLayerPerceptron | False |
| 20 | OLS | OrdinaryLeastSquares | False |
| 21 | OMP | OrthogonalMatchingPursuit | False |
| 22 | PA | PassiveAggressive | False |
| 23 | RNN | RadiusNearestNeighbors | True |
| 24 | RF | RandomForest | True |
| 25 | Ridge | Ridge | False |
| 26 | SGD | StochasticGradientDescent | False |
| 27 | SVM | SupportVectorMachine | False |
| 28 | XGB | XGBoost | False |
InĀ [7]:
Copied!
# Note we only added 5 informative features to the dataset, let's remove the rest
# If we use a model with no native support for multioutput as solver, specify the
# rfe's importance_getter parameter and return the mean of the coefficients over the
# target columns
atom.feature_selection(
strategy="rfe",
solver="ols", # This becomes MultiOutputRegressor(OLS)
n_features=5,
importance_getter=lambda x: np.mean([e.coef_ for e in x.estimators_], axis=0),
)
# Note we only added 5 informative features to the dataset, let's remove the rest
# If we use a model with no native support for multioutput as solver, specify the
# rfe's importance_getter parameter and return the mean of the coefficients over the
# target columns
atom.feature_selection(
strategy="rfe",
solver="ols", # This becomes MultiOutputRegressor(OLS)
n_features=5,
importance_getter=lambda x: np.mean([e.coef_ for e in x.estimators_], axis=0),
)
Fitting FeatureSelector... Performing feature selection ... --> rfe selected 5 features from the dataset. --> Dropping feature x0 (rank 6). --> Dropping feature x5 (rank 5). --> Dropping feature x6 (rank 3). --> Dropping feature x7 (rank 2). --> Dropping feature x9 (rank 4).
InĀ [8]:
Copied!
# Let's train a native, non-native and our custom model
atom.run(models=["Lasso", "RF", model], metric="mse")
# Let's train a native, non-native and our custom model
atom.run(models=["Lasso", "RF", model], metric="mse")
Training ========================= >> Models: Lasso, RF, NN Metric: mse Results for Lasso: Fit --------------------------------------------- Train evaluation --> mse: -5.1516 Test evaluation --> mse: -5.5774 Time elapsed: 0.031s ------------------------------------------------- Time: 0.031s Results for RandomForest: Fit --------------------------------------------- Train evaluation --> mse: -200.7336 Test evaluation --> mse: -1494.3406 Time elapsed: 0.706s ------------------------------------------------- Time: 0.706s Results for NeuralNetwork: Fit --------------------------------------------- Train evaluation --> mse: -111.3789 Test evaluation --> mse: -105.2649 Time elapsed: 2.372s ------------------------------------------------- Time: 2.372s Final results ==================== >> Total time: 3.116s ------------------------------------- Lasso --> mse: -5.5774 ! RandomForest --> mse: -1494.3406 ~ NeuralNetwork --> mse: -105.2649
InĀ [9]:
Copied!
# And check which of the models used a meta-estimator wrapper
for m in atom.models:
print(f"Estimator for {m} is: {atom[m].estimator}")
# And check which of the models used a meta-estimator wrapper
for m in atom.models:
print(f"Estimator for {m} is: {atom[m].estimator}")
Estimator for Lasso is: MultiOutputRegressor(estimator=Lasso(random_state=1), n_jobs=1) Estimator for RF is: RandomForestRegressor(n_jobs=1, random_state=1) Estimator for NN is: NeuralNetwork( model=None build_fn=None warm_start=False random_state=1 optimizer=rmsprop loss=None metrics=None batch_size=None validation_batch_size=None verbose=0 callbacks=None validation_split=0.0 shuffle=True run_eagerly=False epochs=100 n_inputs=5 n_outputs=3 name=NN needs_scaling=True native_multioutput=True native_multilabel=False has_validation=None )
Analyze the results¶
InĀ [10]:
Copied!
# Use the target parameter in plots to specify which target column to use
atom.plot_residuals(target=2)
# Use the target parameter in plots to specify which target column to use
atom.plot_residuals(target=2)
InĀ [11]:
Copied!
with atom.canvas(3, 1, figsize=(900, 1300)):
atom.plot_errors(target=0)
atom.plot_errors(target=1)
atom.plot_errors(target=2)
with atom.canvas(3, 1, figsize=(900, 1300)):
atom.plot_errors(target=0)
atom.plot_errors(target=1)
atom.plot_errors(target=2)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[11], line 2 1 with atom.canvas(3, 1, figsize=(900, 1300)): ----> 2 atom.plot_errors(target=0) 3 atom.plot_errors(target=1) 4 atom.plot_errors(target=2) File ~\Documents\Python\ATOM\atom\utils\utils.py:2712, in crash.<locals>.wrapper(*args, **kwargs) 2709 cache["last_exception"] = ex 2710 args[0].logger.exception("Exception encountered:") -> 2712 raise ex File ~\Documents\Python\ATOM\atom\utils\utils.py:2704, in crash.<locals>.wrapper(*args, **kwargs) 2701 @wraps(f) 2702 def wrapper(*args, **kwargs) -> Any: 2703 try: # Run the function -> 2704 return f(*args, **kwargs) 2706 except Exception as ex: 2707 # If exception is not the same as last, write to log 2708 if ex is not cache["last_exception"] and args[0].logger: File ~\Documents\Python\ATOM\atom\plots\predictionplot.py:691, in PredictionPlot.plot_errors(self, models, rows, target, title, legend, figsize, filename, display) 689 from atom.models import OrdinaryLeastSquares 690 model = OrdinaryLeastSquares(goal=self.task.goal, branches=self._branches) --> 691 estimator = model._get_est().fit(bk.DataFrame(y_true), y_pred) 693 fig.add_trace( 694 self._draw_line( 695 x=(x := np.linspace(y_true.min(), y_true.max(), 100)), (...) 703 ) 704 ) 706 self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis) File ~\Documents\Python\ATOM\venv310\lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs) 1145 estimator._validate_params() 1147 with config_context( 1148 skip_parameter_validation=( 1149 prefer_skip_nested_validation or global_skip_validation 1150 ) 1151 ): -> 1152 return fit_method(estimator, *args, **kwargs) File ~\Documents\Python\ATOM\venv310\lib\site-packages\sklearn\multioutput.py:248, in _MultiOutputEstimator.fit(self, X, y, sample_weight, **fit_params) 245 check_classification_targets(y) 247 if y.ndim == 1: --> 248 raise ValueError( 249 "y must have at least two dimensions for " 250 "multi-output regression but has only one." 251 ) 253 if _routing_enabled(): 254 routed_params = process_routing( 255 obj=self, 256 method="fit", 257 other_params=fit_params, 258 sample_weight=sample_weight, 259 ) ValueError: y must have at least two dimensions for multi-output regression but has only one.